// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// USB streaming data test
//
// This test requires interaction with the USB DPI model or a test application
// on the USB host. The test initializes the USB device and configures a set of
// endpoints for data streaming using bulk transfers.
//
// The DPI model mimicks a USB host. After device initialization, it detects
// the assertion of the pullup and first assigns an address to the device.
// For this test it will then repeatedly fetch data via IN requests to
// each stream and propagate that data to the corresponding OUT endpoints.
//
// The data itself is pseudo-randomly generated by the sender and,
// independently, by the receiving code to check that the data has been
// propagated unmodified and without data loss, corruption, replication etc.

#include "sw/device/lib/dif/dif_pinmux.h"
#include "sw/device/lib/runtime/log.h"
#include "sw/device/lib/runtime/print.h"
#include "sw/device/lib/testing/pinmux_testutils.h"
#include "sw/device/lib/testing/test_framework/check.h"
#include "sw/device/lib/testing/test_framework/ottf_main.h"
#include "sw/device/lib/testing/usb_testutils.h"
#include "sw/device/lib/testing/usb_testutils_controlep.h"

#include "hw/top_earlgrey/sw/autogen/top_earlgrey.h"  // Generated.

// Maximum number of concurrent streams
#ifdef USBDEV_NUM_ENDPOINTS
// Endpoint zero implements the default control pipe
#define STREAMS_MAX (USBDEV_NUM_ENDPOINTS - 1U)
#else
#define STREAMS_MAX 11U
#endif

// Number of streams to be tested
#ifndef NUM_STREAMS
#define NUM_STREAMS STREAMS_MAX
#endif

// Maximum number of buffer simultaneously awaiting transmission
// (we must leave some available for packet reception)
#ifndef MAX_TX_BUFFERS
#define MAX_TX_BUFFERS 24U
#endif

// This takes about 256s presently with 10MHz CPU in CW310 FPGA and physical
// USB with randomized packet sizes and the default memcpy implementation;
// The _MEM_FASTER switch drops the run time to 187s
#define TRANSFER_BYTES_FPGA (0x10U << 20)

// This is appropriate for a Verilator chip simulation with 15 min timeout
#define TRANSFER_BYTES_VERILATOR 0x2400U

// This is about the amount that we can transfer within a 1 hour 'eternal' test
//#define TRANSFER_BYTES_LONG (0xD0U << 20)

// Stream signature words
#define STREAM_SIGNATURE_HEAD 0x579EA01AU
#define STREAM_SIGNATURE_TAIL 0x160AE975U

// Seed numbers for the LFSR generators in each transfer direction for
// the given stream number
#define USBTST_LFSR_SEED(s) (uint8_t)(0x10U + (s)*7U)
#define USBDPI_LFSR_SEED(s) (uint8_t)(0x9BU - (s)*7U)

// Buffer size randomization
#define BUFSZ_LFSR_SEED(s) (uint8_t)(0x17U + (s)*7U)

// Simple LFSR for 8-bit sequences
// Note: zero is an isolated state that shall be avoided
#define LFSR_ADVANCE(lfsr) \
  (((lfsr) << 1) ^         \
   ((((lfsr) >> 1) ^ ((lfsr) >> 2) ^ ((lfsr) >> 3) ^ ((lfsr) >> 7)) & 1U))

// Forward declaration to context state
typedef struct usbdev_stream_test_ctx usbdev_stream_test_ctx_t;

/**
 * Stream signature
 * Note: this needs to be transferred over a byte stream
 */
typedef struct __attribute__((packed)) usbdev_stream_sig {
  /**
   * Head signature word
   */
  uint32_t head_sig;
  /**
   * Initial value of LFSR
   */
  uint8_t init_lfsr;
  /**
   * Stream number
   */
  uint8_t stream;
  /**
   * Reserved fields; should be zero
   */
  uint8_t reserved1;
  uint8_t reserved2;
  /**
   * Number of bytes to be transferred
   */
  uint32_t num_bytes;
  /**
   * Tail signature word
   */
  uint32_t tail_sig;
} usbdev_stream_sig_t;

// Sanity check because the host-side code relies upon the same structure
static_assert(sizeof(usbdev_stream_sig_t) == 0x10U,
              "Host-side code relies upon signature structure");

/**
 * Context state for a single stream
 */
typedef struct usbdev_stream {
  /**
   * Pointer to test context; callback functions receive only stream pointer
   */
  usbdev_stream_test_ctx_t *ctx;
  /**
   * Stream IDentifier
   */
  uint8_t id;
  /**
   * Has the stream signature been sent yet?
   */
  bool sent_sig;
  /**
   * USB device endpoint being used for data transmission
   */
  uint8_t tx_ep;
  /**
   * Transmission Linear Feedback Shift Register (for PRND data generation)
   */
  uint8_t tx_lfsr;
  /**
   * Total number of bytes presented to the USB device for transmission
   */
  uint32_t tx_bytes;
  /**
   * Transmission-side LFSR for selection of buffer size
   */
  uint8_t tx_buf_size;

  /**
   * USB device endpoint being used for data reception
   */
  uint8_t rx_ep;
  /**
   * Reception-side LFSR state (mirrors USBDPI generation of PRND data)
   */
  uint8_t rx_lfsr;
  /**
   * Reception-side shadow of transmission LFSR
   */
  uint8_t rxtx_lfsr;
  /**
   * Total number of bytes received from the USB device
   */
  uint32_t rx_bytes;
  /**
   * Size of transfer in bytes
   */
  uint32_t transfer_bytes;
} usbdev_stream_t;

/**
 * Context state for streaming test
 */
struct usbdev_stream_test_ctx {
  /**
   * Context pointer
   */
  usb_testutils_ctx_t *usbdev;
  /**
   * State information for each of the test streams
   */
  usbdev_stream_t streams[STREAMS_MAX];
  /**
   * Per-endpoint limits on the number of buffers that may be queued for
   * transmission
   */
  uint8_t tx_bufs_limit[USBDEV_NUM_ENDPOINTS];
  /**
   * Per-endpoint counts of completed buffers queued for transmission
   */
  uint8_t tx_bufs_queued[USBDEV_NUM_ENDPOINTS];
  /**
   * Total number of completed buffers
   */
  uint8_t tx_queued_total;
  /**
   * Buffers that have been filled but cannot yet be presented for transmission
   * TODO - perhaps absorb the buffer queuing into usb_testutils because the dif
   * API is explicitly not robust against back-to-back sending of multiple
   * buffers to a single endpoint, and because the read performance is reliant
   * upon having additional buffer(s) already available for immediate
   * presentation
   */
  // 12 X 24 X 4 (or 8?)( BYTES... could perhaps simplify this at some point
  dif_usbdev_buffer_t tx_bufs[USBDEV_NUM_ENDPOINTS][MAX_TX_BUFFERS];
};

/**
 * Configuration values for USB.
 */
static const uint8_t config_descriptors[] = {
    USB_CFG_DSCR_HEAD(USB_CFG_DSCR_LEN + STREAMS_MAX * (USB_INTERFACE_DSCR_LEN +
                                                        2 * USB_EP_DSCR_LEN),
                      STREAMS_MAX),

    // Up to 11 interfaces and STREAMS_MAX in the descriptor head specifies how
    // many of the interfaces will be declared to the host
    VEND_INTERFACE_DSCR(0, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 1U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 1U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(1, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 2U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 2U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(2, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 3U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 3U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(3, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 4U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 4U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(4, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 5U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 5U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(5, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 6U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 6U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(6, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 7U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 7U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(7, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 8U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 8U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(8, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 9U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 9U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(9, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 10U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 10U, USBDEV_MAX_PACKET_SIZE, 0),

    VEND_INTERFACE_DSCR(10, 2, 0x50, 1),
    USB_BULK_EP_DSCR(0, 11U, USBDEV_MAX_PACKET_SIZE, 0),
    USB_BULK_EP_DSCR(1, 11U, USBDEV_MAX_PACKET_SIZE, 0),
};

/**
 * Test descriptor
 */
static const uint8_t test_descriptor[] = {
    USB_TESTUTILS_TEST_DSCR(1, NUM_STREAMS | 0xF0U, 0, 0, 0)};

/**
 * USB device context types.
 */
static usb_testutils_ctx_t usbdev;
static usb_testutils_controlep_ctx_t usbdev_control;

/**
 * Pinmux handle
 */
static dif_pinmux_t pinmux;

/**
 * State information for streaming data test
 */
static usbdev_stream_test_ctx_t stream_test;

/**
 * Specify whether to perform verbose logging, for visibility
 *   (Note that this substantially alters the timing of interactions with the
 * DPI model and will increase the simulation time)
 */
static bool verbose = false;

/**
 * Send only maximal length packets?
 * (important for performance measurements on the USB, but obviously undesirable
 *  for testing reliability/function)
 */
static bool max_packets = false;

/**
 * Number of streams to be created
 */
static const unsigned nstreams = NUM_STREAMS;

/**
 * Diagnostic logging; expensive
 */
static bool log_traffic = false;

// Dump a sequence of bytes as hexadecimal and ASCII for diagnostic purposes
static void buffer_dump(const uint8_t *data, size_t n) {
  base_hexdump_fmt_t fmt = {
      .bytes_per_word = 1,
      .words_per_line = 0x20u,
      .alphabet = &kBaseHexdumpDefaultFmtAlphabet,
  };

  base_hexdump_with(fmt, (char *)data, n);
}

// Create a stream signature buffer
static uint32_t buffer_sig_create(usbdev_stream_t *s,
                                  dif_usbdev_buffer_t *buf) {
  usbdev_stream_sig_t sig;

  sig.head_sig = STREAM_SIGNATURE_HEAD;
  sig.init_lfsr = s->tx_lfsr;
  sig.stream = s->id;
  sig.reserved1 = 0U;
  sig.reserved2 = 0U;
  sig.num_bytes = s->transfer_bytes;
  sig.tail_sig = STREAM_SIGNATURE_TAIL;

  size_t bytes_written;
  CHECK_DIF_OK(dif_usbdev_buffer_write(usbdev.dev, buf, (uint8_t *)&sig,
                                       sizeof(sig), &bytes_written));
  CHECK(bytes_written == sizeof(sig));

  // Note: stream signature is not included in the count of bytes transferred

  return bytes_written;
}

// Fill a buffer with LFSR-generated data
static void buffer_fill(usbdev_stream_t *s, dif_usbdev_buffer_t *buf,
                        uint8_t num_bytes) {
  alignas(uint32_t) uint8_t data[USBDEV_MAX_PACKET_SIZE];

  CHECK(num_bytes <= buf->remaining_bytes);
  CHECK(num_bytes <= sizeof(data));

  if (true) {
    // Emit LFSR-generated byte stream; keep this brief so that we can
    // reduce our latency in responding to USB events (usb_testutils employs
    // polling at present)
    uint8_t lfsr = s->tx_lfsr;

    const uint8_t *edp = &data[num_bytes];
    uint8_t *dp = data;
    while (dp < edp) {
      *dp++ = lfsr;
      lfsr = LFSR_ADVANCE(lfsr);
    }

    // Update the LFSR for the next packet
    s->tx_lfsr = lfsr;
  } else {
    // Undefined buffer contents; useful for profiling IN throughput on
    // CW310, because the CPU load at 10MHz can be an appreciable slowdown
  }

  if (verbose && log_traffic) {
    buffer_dump(data, num_bytes);
  }

  size_t bytes_written;

  CHECK_DIF_OK(dif_usbdev_buffer_write(usbdev.dev, buf, data, num_bytes,
                                       &bytes_written));
  CHECK(bytes_written == num_bytes);
  s->tx_bytes += bytes_written;
}

// Check the contents of a received buffer
static void buffer_check(usbdev_stream_test_ctx_t *ctx, usbdev_stream_t *s,
                         dif_usbdev_rx_packet_info_t packet_info,
                         dif_usbdev_buffer_t buf) {
  usb_testutils_ctx_t *usbdev = ctx->usbdev;
  uint8_t len = packet_info.length;

  if (len > 0) {
    alignas(uint32_t) uint8_t data[USBDEV_MAX_PACKET_SIZE];

    CHECK(len <= sizeof(data));

    size_t bytes_read;

    // Notes: the buffer being read here is USBDEV memory accessed as MMIO, so
    //        only the DIF accesses it directly. when we consume the final bytes
    //        from the read buffer, it is automatically returned to the buffer
    //        pool.
    CHECK_DIF_OK(dif_usbdev_buffer_read(usbdev->dev, usbdev->buffer_pool, &buf,
                                        data, len, &bytes_read));
    CHECK(bytes_read == len);

    if (log_traffic) {
      buffer_dump(data, bytes_read);
    }

    // Check received data against expected LFSR-generated byte stream;
    // keep this brief so that we can reduce our latency in responding to
    // USB events (usb_testutils employs polling at present)
    uint8_t rxtx_lfsr = s->rxtx_lfsr;
    uint8_t rx_lfsr = s->rx_lfsr;

    const uint8_t *esp = &data[bytes_read];
    const uint8_t *sp = data;
    while (sp < esp) {
      // Received data should be the XOR of two LFSR-generated PRND streams -
      // ours on the
      //   transmission side, and that of the DPI model
      uint8_t expected = rxtx_lfsr ^ rx_lfsr;
      CHECK(expected == *sp,
            "S%u: Unexpected received data 0x%02x : (LFSRs 0x%02x 0x%02x)",
            s->id, *sp, rxtx_lfsr, rx_lfsr);

      rxtx_lfsr = LFSR_ADVANCE(rxtx_lfsr);
      rx_lfsr = LFSR_ADVANCE(rx_lfsr);
      sp++;
    }

    // Update the LFSRs for the next packet
    s->rxtx_lfsr = rxtx_lfsr;
    s->rx_lfsr = rx_lfsr;
  } else {
    // In the event that we've received a zero-length data packet, we still
    // must return the buffer to the pool
    CHECK_DIF_OK(
        dif_usbdev_buffer_return(usbdev->dev, usbdev->buffer_pool, &buf));
  }
}

// Callback for successful buffer transmission
static void strm_tx_done(void *stream_v, usb_testutils_xfr_result_t result) {
  usbdev_stream_t *s = (usbdev_stream_t *)stream_v;
  usbdev_stream_test_ctx_t *ctx = s->ctx;
  usb_testutils_ctx_t *usbdev = ctx->usbdev;

  // If we do not have at least one queued buffer then something has gone wrong
  // and this callback is inappropriate
  uint8_t tx_ep = s->tx_ep;
  uint8_t nqueued = ctx->tx_bufs_queued[tx_ep];

  if (verbose) {
    LOG_INFO("strm_tx_done called. %u (%u total) buffers(s) are queued",
             nqueued, ctx->tx_queued_total);
  }

  CHECK(nqueued > 0);

  // Note: since buffer transmission and completion signalling both occur within
  // the foreground code (polling, not interrupt-driven) there is no issue of
  // potential races here

  if (nqueued > 0) {
    // Shuffle the buffer descriptions, without using memmove
    for (unsigned idx = 1u; idx < nqueued; idx++) {
      ctx->tx_bufs[tx_ep][idx - 1u] = ctx->tx_bufs[tx_ep][idx];
    }

    // Is there another buffer ready to be transmitted?
    ctx->tx_queued_total--;
    ctx->tx_bufs_queued[tx_ep] = --nqueued;

    if (nqueued) {
      CHECK_DIF_OK(
          dif_usbdev_send(usbdev->dev, tx_ep, &ctx->tx_bufs[tx_ep][0u]));
    }
  }
}

// Callback for buffer reception
static void strm_rx(void *stream_v, dif_usbdev_rx_packet_info_t packet_info,
                    dif_usbdev_buffer_t buf) {
  usbdev_stream_t *s = (usbdev_stream_t *)stream_v;
  usbdev_stream_test_ctx_t *ctx = s->ctx;
  usb_testutils_ctx_t *usbdev = ctx->usbdev;

  CHECK(packet_info.endpoint == s->rx_ep);

  // We do not expect to receive SETUP packets to this endpoint
  CHECK(!packet_info.is_setup);

  if (verbose) {
    LOG_INFO("Stream %u: Received buffer of %u bytes(s)", s->id,
             packet_info.length);
  }

  if (true) {
    buffer_check(ctx, s, packet_info, buf);
  } else {
    // Note: this is just test code for measuring the OUT throughput
    usb_testutils_ctx_t *usbdev = ctx->usbdev;
    CHECK_DIF_OK(
        dif_usbdev_buffer_return(usbdev->dev, usbdev->buffer_pool, &buf));
  }

  s->rx_bytes += packet_info.length;
}

// Callback for unexpected data reception (IN endpoint)
static void rx_show(void *stream_v, dif_usbdev_rx_packet_info_t packet_info,
                    dif_usbdev_buffer_t buf) {
  usbdev_stream_t *s = (usbdev_stream_t *)stream_v;
  usbdev_stream_test_ctx_t *ctx = s->ctx;
  usb_testutils_ctx_t *usbdev = ctx->usbdev;
  uint8_t data[0x100U];
  size_t bytes_read;
  CHECK_DIF_OK(dif_usbdev_buffer_read(usbdev->dev, usbdev->buffer_pool, &buf,
                                      data, packet_info.length, &bytes_read));
  LOG_INFO("rx_show packet of %u byte(s) - read %u", packet_info.length,
           bytes_read);
  buffer_dump(data, bytes_read);
}

// Returns an indication of whether a stream has completed its data transfer
bool stream_completed(const usbdev_stream_t *s) {
  return (s->tx_bytes >= s->transfer_bytes) &&
         (s->rx_bytes >= s->transfer_bytes);
}

// Initialise a stream, preparing it for use
static void stream_init(usbdev_stream_test_ctx_t *ctx, usbdev_stream_t *s,
                        uint8_t id, uint8_t ep_in, uint8_t ep_out,
                        uint32_t transfer_bytes) {
  // We need to be able to locate the test context given only the stream
  // pointer within the strm_tx_done callback from usb_testutils
  s->ctx = ctx;

  // Remember the stream IDentifier
  s->id = id;

  // Not yet sent stream signature
  s->sent_sig = false;

  // Initialise the transfer state
  s->tx_bytes = 0u;
  s->rx_bytes = 0u;
  s->transfer_bytes = transfer_bytes;

  // Initialise the LFSR state for transmission and reception sides
  // - we use a simple LFSR to generate a PRND stream to transmit to the USBPI
  // - the USBDPI XORs the received data with another LFSR-generated stream of
  //   its own, and transmits the result back to us
  // - to check the returned data, our reception code mimics both LFSRs
  s->tx_lfsr = USBTST_LFSR_SEED(id);
  s->rxtx_lfsr = s->tx_lfsr;
  s->rx_lfsr = USBDPI_LFSR_SEED(id);

  // Packet size randomization
  s->tx_buf_size = BUFSZ_LFSR_SEED(id);

  // Set up the endpoint for IN transfers (TO host)
  //
  // Note: We install the rx_show handler to catch any misdirected data
  // transfers
  void (*rx)(void *, dif_usbdev_rx_packet_info_t, dif_usbdev_buffer_t) =
      (ep_in == ep_out) ? strm_rx : rx_show;

  s->tx_ep = ep_in;
  usb_testutils_endpoint_setup(ctx->usbdev, ep_in, kUsbdevOutStream, s,
                               strm_tx_done, rx, NULL, NULL);
  s->rx_ep = ep_out;
  if (ep_out != ep_in) {
    // Set up the endpoint for OUT transfers (FROM host)
    usb_testutils_endpoint_setup(ctx->usbdev, ep_out, kUsbdevOutStream, s, NULL,
                                 strm_rx, NULL, NULL);
  }
}

// Service the given stream, preparing and/or sending any data that we can;
// data reception is handled via callbacks and requires no attention here
static void stream_service(usbdev_stream_test_ctx_t *ctx, usbdev_stream_t *s) {
  // Generate output data as soon as possible and make it available for
  //   collection by the host

  uint8_t tx_ep = s->tx_ep;
  uint8_t nqueued = ctx->tx_bufs_queued[tx_ep];

  if (s->tx_bytes < s->transfer_bytes &&        // More bytes to transfer?
      nqueued < ctx->tx_bufs_limit[tx_ep] &&    // Endpoint allowed buffer?
      ctx->tx_queued_total < MAX_TX_BUFFERS) {  // Total buffers not exceeded?
    dif_usbdev_buffer_t buf;

    // See whether we can populate another buffer yet
    dif_result_t dif_result =
        dif_usbdev_buffer_request(usbdev.dev, usbdev.buffer_pool, &buf);
    if (dif_result == kDifOk) {
      // This is just for reporting the number of buffers presented to the
      // USB device, as a progress indicator
      static unsigned bufs_sent = 0u;
      uint32_t num_bytes;

      if (s->sent_sig) {
        if (max_packets) {
          num_bytes = USBDEV_MAX_PACKET_SIZE;
        } else {
          // Vary the amount of data sent per buffer
          num_bytes = s->tx_buf_size % (USBDEV_MAX_PACKET_SIZE + 1u);
          s->tx_buf_size = LFSR_ADVANCE(s->tx_buf_size);
        }
        uint32_t tx_left = s->transfer_bytes - s->tx_bytes;
        if (num_bytes > tx_left)
          num_bytes = tx_left;

        buffer_fill(s, &buf, num_bytes);
      } else {
        // Construct a signature to send to the host-side software,
        // identifying the stream and its properties
        num_bytes = buffer_sig_create(s, &buf);
        s->sent_sig = true;
      }

      // Remember the buffer until we're informed that it has been
      // successfully transmitted
      //
      // Note: since the 'tx_done' callback occurs from foreground code that
      // is polling, there is no issue of interrupt races here
      ctx->tx_bufs[tx_ep][nqueued] = buf;
      ctx->tx_bufs_queued[tx_ep] = ++nqueued;
      ctx->tx_queued_total++;

      // Can we present this buffer for transmission yet?
      if (nqueued <= 1U) {
        CHECK_DIF_OK(dif_usbdev_send(usbdev.dev, tx_ep, &buf));
      }

      if (verbose) {
        LOG_INFO(
            "Stream %u: %uth buffer (of 0x%x byte(s)) awaiting transmission",
            s->id, bufs_sent, num_bytes);
      }
      bufs_sent++;
    } else {
      // If we have no more buffers available right now, continue polling...
      CHECK(dif_result == kDifUnavailable);
    }
  }
}

OTTF_DEFINE_TEST_CONFIG();

bool test_main(void) {
  // Context state for streaming test
  usbdev_stream_test_ctx_t *ctx = &stream_test;

  CHECK(kDeviceType == kDeviceSimVerilator || kDeviceType == kDeviceFpgaCw310,
        "This test is not expected to run on platforms other than the "
        "Verilator simulation or CW310 FPGA. It needs logic on the host side "
        "to retrieve, scramble and return the generated byte stream");

  LOG_INFO("Running USBDEV Stream Test");

  // Check we can support the requested number of streams
  CHECK(nstreams && nstreams < USBDEV_NUM_ENDPOINTS);

  // Decide upon the number of bytes to be transferred for the entire test
  uint32_t transfer_bytes = TRANSFER_BYTES_FPGA;
  if (kDeviceType == kDeviceSimVerilator) {
    transfer_bytes = TRANSFER_BYTES_VERILATOR;
  }
  transfer_bytes = (transfer_bytes + nstreams - 1) / nstreams;
  LOG_INFO(" - %u stream(s), 0x%x bytes each", nstreams, transfer_bytes);

  CHECK_DIF_OK(dif_pinmux_init(
      mmio_region_from_addr(TOP_EARLGREY_PINMUX_AON_BASE_ADDR), &pinmux));
  pinmux_testutils_init(&pinmux);
  CHECK_DIF_OK(dif_pinmux_input_select(
      &pinmux, kTopEarlgreyPinmuxPeripheralInUsbdevSense,
      kTopEarlgreyPinmuxInselIoc7));

  // Remember context state for usb_testutils context
  ctx->usbdev = &usbdev;

  // Call `usbdev_init` here so that DPI will not start until the
  // simulation has finished all of the printing, which takes a while
  // if `--trace` was passed in.
  usb_testutils_init(ctx->usbdev, /*pinflip=*/false, /*en_diff_rcvr=*/false,
                     /*tx_use_d_se0=*/false);
  usb_testutils_controlep_init(&usbdev_control, ctx->usbdev, 0,
                               config_descriptors, sizeof(config_descriptors),
                               test_descriptor, sizeof(test_descriptor));
  while (usbdev_control.device_state != kUsbTestutilsDeviceConfigured) {
    usb_testutils_poll(ctx->usbdev);
  }

  // Initialise the state of each stream
  for (unsigned id = 0U; id < nstreams; id++) {
    // Which endpoint are we using for the IN transfers to the host?
    const uint8_t ep_in = 1u + id;
    // Which endpoint are we using for the OUT transfers from the host?
    const uint8_t ep_out = 1u + id;
    stream_init(ctx, &ctx->streams[id], id, ep_in, ep_out, transfer_bytes);
  }

  // Decide how many buffers each endpoint may queue up for transmission;
  // we must ensure that there are buffers available for reception, and we
  // do not want any endpoint to starve another
  for (unsigned s = 0U; s < nstreams; s++) {
    // This is slightly overspending the available buffers, leaving the
    //   endpoints to vie for the final few buffers, so it's important that
    //   we limit the total number of buffers across all endpoints too
    unsigned ep = ctx->streams[s].tx_ep;
    ctx->tx_bufs_queued[ep] = 0U;
    ctx->tx_bufs_limit[ep] = (MAX_TX_BUFFERS + nstreams - 1) / nstreams;
  }
  ctx->tx_queued_total = 0U;

  if (verbose) {
    LOG_INFO("Commencing data transfer...");
  }

  bool done = false;
  do {
    for (unsigned s = 0U; s < nstreams; s++) {
      stream_service(ctx, &ctx->streams[s]);

      // We must keep polling regularly in order to handle detection of packet
      // transmission as well as perform packet reception and checking
      usb_testutils_poll(ctx->usbdev);
    }

    // See whether any streams still have more work to do
    unsigned s = 0U;
    while (s < nstreams && stream_completed(&ctx->streams[s])) {
      s++;
    }
    done = (s >= nstreams);
  } while (!done);

  // Determine the total counts of bytes sent and received
  uint32_t tx_bytes = 0U;
  uint32_t rx_bytes = 0U;
  for (unsigned s = 0U; s < nstreams; s++) {
    tx_bytes += ctx->streams[s].tx_bytes;
    rx_bytes += ctx->streams[s].rx_bytes;
  }

  LOG_INFO("USB sent 0x%x byte(s), received and checked 0x%x byte(s)", tx_bytes,
           rx_bytes);

  CHECK(tx_bytes == nstreams * transfer_bytes,
        "Unexpected count of byte(s) sent to USB host");

  return true;
}
